{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "#导入包\n",
    "from xgboost import XGBClassifier\n",
    "import xgboost as xgb\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.model_selection import StratifiedKFold\n",
    "\n",
    "from sklearn.metrics import log_loss\n",
    "\n",
    "from matplotlib import pyplot\n",
    "import seaborn as sns\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/chsl/.local/lib/python3.6/site-packages/IPython/core/interactiveshell.py:3018: DtypeWarning: Columns (12,18) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  interactivity=interactivity, compiler=compiler, result=result)\n"
     ]
    }
   ],
   "source": [
    "train = pd.read_csv(\"Train.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Gender</th>\n",
       "      <th>City</th>\n",
       "      <th>Monthly_Income</th>\n",
       "      <th>DOB</th>\n",
       "      <th>Lead_Creation_Date</th>\n",
       "      <th>Loan_Amount_Applied</th>\n",
       "      <th>Loan_Tenure_Applied</th>\n",
       "      <th>Existing_EMI</th>\n",
       "      <th>Employer_Name</th>\n",
       "      <th>...</th>\n",
       "      <th>Interest_Rate</th>\n",
       "      <th>Processing_Fee</th>\n",
       "      <th>EMI_Loan_Submitted</th>\n",
       "      <th>Filled_Form</th>\n",
       "      <th>Device_Type</th>\n",
       "      <th>Var2</th>\n",
       "      <th>Source</th>\n",
       "      <th>Var4</th>\n",
       "      <th>LoggedIn</th>\n",
       "      <th>Disbursed</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ID000002C20</td>\n",
       "      <td>Female</td>\n",
       "      <td>Delhi</td>\n",
       "      <td>20000</td>\n",
       "      <td>23-May-78</td>\n",
       "      <td>15-May-15</td>\n",
       "      <td>300000.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>CYBOSOL</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>N</td>\n",
       "      <td>Web-browser</td>\n",
       "      <td>G</td>\n",
       "      <td>S122</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ID000004E40</td>\n",
       "      <td>Male</td>\n",
       "      <td>Mumbai</td>\n",
       "      <td>35000</td>\n",
       "      <td>07-Oct-85</td>\n",
       "      <td>04-May-15</td>\n",
       "      <td>200000.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>TATA CONSULTANCY SERVICES LTD (TCS)</td>\n",
       "      <td>...</td>\n",
       "      <td>13.25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6762.9</td>\n",
       "      <td>N</td>\n",
       "      <td>Web-browser</td>\n",
       "      <td>G</td>\n",
       "      <td>S122</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ID000007H20</td>\n",
       "      <td>Male</td>\n",
       "      <td>Panchkula</td>\n",
       "      <td>22500</td>\n",
       "      <td>10-Oct-81</td>\n",
       "      <td>19-May-15</td>\n",
       "      <td>600000.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>ALCHEMIST HOSPITALS LTD</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>N</td>\n",
       "      <td>Web-browser</td>\n",
       "      <td>B</td>\n",
       "      <td>S143</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ID000008I30</td>\n",
       "      <td>Male</td>\n",
       "      <td>Saharsa</td>\n",
       "      <td>35000</td>\n",
       "      <td>30-Nov-87</td>\n",
       "      <td>09-May-15</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BIHAR GOVERNMENT</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>N</td>\n",
       "      <td>Web-browser</td>\n",
       "      <td>B</td>\n",
       "      <td>S143</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>ID000009J40</td>\n",
       "      <td>Male</td>\n",
       "      <td>Bengaluru</td>\n",
       "      <td>100000</td>\n",
       "      <td>17-Feb-84</td>\n",
       "      <td>20-May-15</td>\n",
       "      <td>500000.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>25000.0</td>\n",
       "      <td>GLOBAL EDGE SOFTWARE</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>N</td>\n",
       "      <td>Web-browser</td>\n",
       "      <td>B</td>\n",
       "      <td>S134</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 26 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            ID  Gender       City  Monthly_Income        DOB  \\\n",
       "0  ID000002C20  Female      Delhi           20000  23-May-78   \n",
       "1  ID000004E40    Male     Mumbai           35000  07-Oct-85   \n",
       "2  ID000007H20    Male  Panchkula           22500  10-Oct-81   \n",
       "3  ID000008I30    Male    Saharsa           35000  30-Nov-87   \n",
       "4  ID000009J40    Male  Bengaluru          100000  17-Feb-84   \n",
       "\n",
       "  Lead_Creation_Date  Loan_Amount_Applied  Loan_Tenure_Applied  Existing_EMI  \\\n",
       "0          15-May-15             300000.0                  5.0           0.0   \n",
       "1          04-May-15             200000.0                  2.0           0.0   \n",
       "2          19-May-15             600000.0                  4.0           0.0   \n",
       "3          09-May-15            1000000.0                  5.0           0.0   \n",
       "4          20-May-15             500000.0                  2.0       25000.0   \n",
       "\n",
       "                         Employer_Name    ...    Interest_Rate Processing_Fee  \\\n",
       "0                              CYBOSOL    ...              NaN            NaN   \n",
       "1  TATA CONSULTANCY SERVICES LTD (TCS)    ...            13.25            NaN   \n",
       "2              ALCHEMIST HOSPITALS LTD    ...              NaN            NaN   \n",
       "3                     BIHAR GOVERNMENT    ...              NaN            NaN   \n",
       "4                 GLOBAL EDGE SOFTWARE    ...              NaN            NaN   \n",
       "\n",
       "  EMI_Loan_Submitted Filled_Form  Device_Type  Var2  Source  Var4 LoggedIn  \\\n",
       "0                NaN           N  Web-browser     G    S122     1        0   \n",
       "1             6762.9           N  Web-browser     G    S122     3        0   \n",
       "2                NaN           N  Web-browser     B    S143     1        0   \n",
       "3                NaN           N  Web-browser     B    S143     3        0   \n",
       "4                NaN           N  Web-browser     B    S134     3        1   \n",
       "\n",
       "  Disbursed  \n",
       "0       0.0  \n",
       "1       0.0  \n",
       "2       0.0  \n",
       "3       0.0  \n",
       "4       0.0  \n",
       "\n",
       "[5 rows x 26 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 87020 entries, 0 to 87019\n",
      "Data columns (total 26 columns):\n",
      "ID                       87020 non-null object\n",
      "Gender                   87020 non-null object\n",
      "City                     86017 non-null object\n",
      "Monthly_Income           87020 non-null int64\n",
      "DOB                      87020 non-null object\n",
      "Lead_Creation_Date       87020 non-null object\n",
      "Loan_Amount_Applied      86949 non-null float64\n",
      "Loan_Tenure_Applied      86949 non-null float64\n",
      "Existing_EMI             86949 non-null float64\n",
      "Employer_Name            86949 non-null object\n",
      "Salary_Account           75256 non-null object\n",
      "Mobile_Verified          87020 non-null object\n",
      "Var5                     87020 non-null object\n",
      "Var1                     87019 non-null object\n",
      "Loan_Amount_Submitted    52407 non-null float64\n",
      "Loan_Tenure_Submitted    52407 non-null float64\n",
      "Interest_Rate            27726 non-null float64\n",
      "Processing_Fee           27420 non-null float64\n",
      "EMI_Loan_Submitted       27727 non-null object\n",
      "Filled_Form              87020 non-null object\n",
      "Device_Type              87020 non-null object\n",
      "Var2                     87020 non-null object\n",
      "Source                   87020 non-null object\n",
      "Var4                     87020 non-null int64\n",
      "LoggedIn                 87020 non-null int64\n",
      "Disbursed                87019 non-null float64\n",
      "dtypes: float64(8), int64(3), object(15)\n",
      "memory usage: 17.3+ MB\n"
     ]
    }
   ],
   "source": [
    "train.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1.对特征工程处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.,  1., nan])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['Disbursed'].unique()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "竟然有一行数据是空的结果，将空的行清理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False    87019\n",
       "True         1\n",
       "Name: Disbursed, dtype: int64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['Disbursed'].isnull().value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[62689]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['Disbursed'] = train['Disbursed'].fillna(999)\n",
    "train[(train.Disbursed==999)].index.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "train = train.drop([62689,])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0., 1.])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['Disbursed'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 87019 entries, 0 to 87019\n",
      "Data columns (total 26 columns):\n",
      "ID                       87019 non-null object\n",
      "Gender                   87019 non-null object\n",
      "City                     86016 non-null object\n",
      "Monthly_Income           87019 non-null int64\n",
      "DOB                      87019 non-null object\n",
      "Lead_Creation_Date       87019 non-null object\n",
      "Loan_Amount_Applied      86948 non-null float64\n",
      "Loan_Tenure_Applied      86948 non-null float64\n",
      "Existing_EMI             86948 non-null float64\n",
      "Employer_Name            86948 non-null object\n",
      "Salary_Account           75255 non-null object\n",
      "Mobile_Verified          87019 non-null object\n",
      "Var5                     87019 non-null object\n",
      "Var1                     87019 non-null object\n",
      "Loan_Amount_Submitted    52407 non-null float64\n",
      "Loan_Tenure_Submitted    52407 non-null float64\n",
      "Interest_Rate            27726 non-null float64\n",
      "Processing_Fee           27420 non-null float64\n",
      "EMI_Loan_Submitted       27726 non-null object\n",
      "Filled_Form              87019 non-null object\n",
      "Device_Type              87019 non-null object\n",
      "Var2                     87019 non-null object\n",
      "Source                   87019 non-null object\n",
      "Var4                     87019 non-null int64\n",
      "LoggedIn                 87019 non-null int64\n",
      "Disbursed                87019 non-null float64\n",
      "dtypes: float64(8), int64(3), object(15)\n",
      "memory usage: 17.9+ MB\n"
     ]
    }
   ],
   "source": [
    "train.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "DOB为出生日期，转为年龄"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "#转换为标准时间格式\n",
    "import datetime;\n",
    "now_year = datetime.datetime.today().year\n",
    "\n",
    "train['DOB'] = pd.to_datetime(train['DOB'])\n",
    "train['age'] = now_year - train['DOB'].dt.year"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Gender</th>\n",
       "      <th>City</th>\n",
       "      <th>Monthly_Income</th>\n",
       "      <th>Lead_Creation_Date</th>\n",
       "      <th>Loan_Amount_Applied</th>\n",
       "      <th>Loan_Tenure_Applied</th>\n",
       "      <th>Existing_EMI</th>\n",
       "      <th>Employer_Name</th>\n",
       "      <th>Salary_Account</th>\n",
       "      <th>Mobile_Verified</th>\n",
       "      <th>...</th>\n",
       "      <th>Interest_Rate</th>\n",
       "      <th>Processing_Fee</th>\n",
       "      <th>EMI_Loan_Submitted</th>\n",
       "      <th>Filled_Form</th>\n",
       "      <th>Device_Type</th>\n",
       "      <th>Var2</th>\n",
       "      <th>Source</th>\n",
       "      <th>Var4</th>\n",
       "      <th>Disbursed</th>\n",
       "      <th>age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>172</td>\n",
       "      <td>2318</td>\n",
       "      <td>44</td>\n",
       "      <td>133</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>8691</td>\n",
       "      <td>20</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>73</td>\n",
       "      <td>571</td>\n",
       "      <td>4545</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>446</td>\n",
       "      <td>4315</td>\n",
       "      <td>11</td>\n",
       "      <td>86</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>38690</td>\n",
       "      <td>22</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>571</td>\n",
       "      <td>3733</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>43</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>497</td>\n",
       "      <td>2831</td>\n",
       "      <td>56</td>\n",
       "      <td>216</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>1833</td>\n",
       "      <td>44</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>73</td>\n",
       "      <td>571</td>\n",
       "      <td>4545</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>16</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>47</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>568</td>\n",
       "      <td>4315</td>\n",
       "      <td>26</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>5698</td>\n",
       "      <td>44</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>73</td>\n",
       "      <td>571</td>\n",
       "      <td>4545</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>16</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>41</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>87</td>\n",
       "      <td>6</td>\n",
       "      <td>59</td>\n",
       "      <td>193</td>\n",
       "      <td>3</td>\n",
       "      <td>1514</td>\n",
       "      <td>13513</td>\n",
       "      <td>20</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>73</td>\n",
       "      <td>571</td>\n",
       "      <td>4545</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>44</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Gender  City  Monthly_Income  Lead_Creation_Date  Loan_Amount_Applied  \\\n",
       "0       0   172            2318                  44                  133   \n",
       "1       1   446            4315                  11                   86   \n",
       "2       1   497            2831                  56                  216   \n",
       "3       1   568            4315                  26                    6   \n",
       "4       1    87               6                  59                  193   \n",
       "\n",
       "   Loan_Tenure_Applied  Existing_EMI  Employer_Name  Salary_Account  \\\n",
       "0                    6             0           8691              20   \n",
       "1                    3             0          38690              22   \n",
       "2                    5             0           1833              44   \n",
       "3                    6             0           5698              44   \n",
       "4                    3          1514          13513              20   \n",
       "\n",
       "   Mobile_Verified ...   Interest_Rate  Processing_Fee  EMI_Loan_Submitted  \\\n",
       "0                0 ...              73             571                4545   \n",
       "1                1 ...               3             571                3733   \n",
       "2                1 ...              73             571                4545   \n",
       "3                1 ...              73             571                4545   \n",
       "4                1 ...              73             571                4545   \n",
       "\n",
       "   Filled_Form  Device_Type  Var2  Source  Var4  Disbursed  age  \n",
       "0            0            1     6       0     1          0   50  \n",
       "1            0            1     6       0     3          0   43  \n",
       "2            0            1     1      16     1          0   47  \n",
       "3            0            1     1      16     3          0   41  \n",
       "4            0            1     1       8     3          0   44  \n",
       "\n",
       "[5 rows x 24 columns]"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Gender</th>\n",
       "      <th>City</th>\n",
       "      <th>Monthly_Income</th>\n",
       "      <th>DOB</th>\n",
       "      <th>Lead_Creation_Date</th>\n",
       "      <th>Loan_Amount_Applied</th>\n",
       "      <th>Loan_Tenure_Applied</th>\n",
       "      <th>Existing_EMI</th>\n",
       "      <th>Employer_Name</th>\n",
       "      <th>...</th>\n",
       "      <th>Processing_Fee</th>\n",
       "      <th>EMI_Loan_Submitted</th>\n",
       "      <th>Filled_Form</th>\n",
       "      <th>Device_Type</th>\n",
       "      <th>Var2</th>\n",
       "      <th>Source</th>\n",
       "      <th>Var4</th>\n",
       "      <th>LoggedIn</th>\n",
       "      <th>Disbursed</th>\n",
       "      <th>age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>172</td>\n",
       "      <td>2318</td>\n",
       "      <td>2998</td>\n",
       "      <td>44</td>\n",
       "      <td>133</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>8691</td>\n",
       "      <td>...</td>\n",
       "      <td>571</td>\n",
       "      <td>4545</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>446</td>\n",
       "      <td>4315</td>\n",
       "      <td>5669</td>\n",
       "      <td>11</td>\n",
       "      <td>86</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>38690</td>\n",
       "      <td>...</td>\n",
       "      <td>571</td>\n",
       "      <td>3733</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>43</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>497</td>\n",
       "      <td>2831</td>\n",
       "      <td>4212</td>\n",
       "      <td>56</td>\n",
       "      <td>216</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>1833</td>\n",
       "      <td>...</td>\n",
       "      <td>571</td>\n",
       "      <td>4545</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>16</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>47</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>568</td>\n",
       "      <td>4315</td>\n",
       "      <td>6453</td>\n",
       "      <td>26</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>5698</td>\n",
       "      <td>...</td>\n",
       "      <td>571</td>\n",
       "      <td>4545</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>16</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>41</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>87</td>\n",
       "      <td>6</td>\n",
       "      <td>5071</td>\n",
       "      <td>59</td>\n",
       "      <td>193</td>\n",
       "      <td>3</td>\n",
       "      <td>1514</td>\n",
       "      <td>13513</td>\n",
       "      <td>...</td>\n",
       "      <td>571</td>\n",
       "      <td>4545</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>44</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   ID  Gender  City  Monthly_Income   DOB  Lead_Creation_Date  \\\n",
       "0   0       0   172            2318  2998                  44   \n",
       "1   1       1   446            4315  5669                  11   \n",
       "2   2       1   497            2831  4212                  56   \n",
       "3   3       1   568            4315  6453                  26   \n",
       "4   4       1    87               6  5071                  59   \n",
       "\n",
       "   Loan_Amount_Applied  Loan_Tenure_Applied  Existing_EMI  Employer_Name ...   \\\n",
       "0                  133                    6             0           8691 ...    \n",
       "1                   86                    3             0          38690 ...    \n",
       "2                  216                    5             0           1833 ...    \n",
       "3                    6                    6             0           5698 ...    \n",
       "4                  193                    3          1514          13513 ...    \n",
       "\n",
       "   Processing_Fee  EMI_Loan_Submitted  Filled_Form  Device_Type  Var2  Source  \\\n",
       "0             571                4545            0            1     6       0   \n",
       "1             571                3733            0            1     6       0   \n",
       "2             571                4545            0            1     1      16   \n",
       "3             571                4545            0            1     1      16   \n",
       "4             571                4545            0            1     1       8   \n",
       "\n",
       "   Var4  LoggedIn  Disbursed  age  \n",
       "0     1         0          0   50  \n",
       "1     3         0          0   43  \n",
       "2     1         0          0   47  \n",
       "3     3         0          0   41  \n",
       "4     3         1          0   44  \n",
       "\n",
       "[5 rows x 27 columns]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.preprocessing import LabelEncoder\n",
    "labelencoder = LabelEncoder()\n",
    "for col in train.columns:\n",
    "    train[col] = labelencoder.fit_transform(train[col].astype(str))\n",
    "    \n",
    "train.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据分开x，y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train = train['Disbursed']\n",
    "train = train.drop(['ID','DOB','LoggedIn'],axis = 1)\n",
    "X_train = np.array(train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "#准备交叉验证\n",
    "kfold = StratifiedKFold(n_splits = 5,shuffle = True,random_state = 3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# n_estimators"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "def modelfit(alg, X_train, y_train, cv_folds=None, early_stopping_rounds=10):\n",
    "    xgb_param = alg.get_xgb_params()\n",
    "    #xgb_param['num_class'] = 2\n",
    "    \n",
    "    xgtrain = xgb.DMatrix(X_train, label = y_train)\n",
    "        \n",
    "    cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], folds =cv_folds,metrics='logloss', early_stopping_rounds=early_stopping_rounds)\n",
    "  \n",
    "    cvresult.to_csv('1_nestimators.csv', index_label = 'n_estimators')\n",
    "    \n",
    "    #最佳参数n_estimators\n",
    "    n_estimators = cvresult.shape[0]\n",
    "    print(n_estimators)\n",
    "    # 训练模型\n",
    "    alg.set_params(n_estimators = n_estimators)\n",
    "    alg.fit(X_train, y_train, eval_metric='logloss')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "139\n"
     ]
    }
   ],
   "source": [
    "xgb1 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=1000,\n",
    "        max_depth=5,\n",
    "        min_child_weight=1,\n",
    "        gamma=0,\n",
    "        subsample=0.3,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel=0.7,\n",
    "        objective= 'binary:logistic',\n",
    "        seed=3)\n",
    "\n",
    "modelfit(xgb1, X_train, y_train, cv_folds = kfold)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/chsl/.local/lib/python3.6/site-packages/ipykernel_launcher.py:1: FutureWarning: from_csv is deprecated. Please use read_csv(...) instead. Note that some of the default arguments are different, so please refer to the documentation for from_csv when changing your function calls\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEXCAYAAABCjVgAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAAIABJREFUeJzt3XmcHXWd7vHPc3rN0lm7s3bIAmEJW8AYwBkVt8uiBlRAGHAEUcaZ4aqjMw7OeLleZpyrjqM4I9cRHcUNGURxIqCALC6jQAJEMEAgCQnZSDr73p3u/t4/qro9abqT7qSr63TO83696pVTv/qdqufUSZ/vqeVUKSIwMzMDKOQdwMzMSoeLgpmZdXJRMDOzTi4KZmbWyUXBzMw6uSiYmVknFwWzIpL+TtLX885hlhcXhUFG0nBJKyRdXtRWJ+klSRcVtc2RdJekLZK2SnpG0qcljU6nXympTdLOdFgu6c8zzn62pNVZLqMvussTEf8UEe/PaHkrJL05i3lnYaDer8G2Xo50LgqDTETsBP4MuFFSQ9r8OWBhRNwBIOk1wMPAfwPHR8Qo4FygFTi1aHa/jYjhETEceBfwOUmnDcwrsb6QVJl3BisTEeFhEA7ALcD3gbOBTcCEomm/Bv7tIM+/Evh1l7bHgD8pGp8HLAa2khSZE4qmnZC2bU37zCuadj7wDLADWAP8NTAM2AO0AzvTYVIPr+sm4O70+Y8CR/difRwP3A9sBpYAlxxKHuBTwHfT500DArgKWAVsAT4IvBp4Kn3tXy5aztHAg+n7sRH4HjAqnfaddFl70mV9vBfreAXwt+mymoHKdHxN+lqWAG/qZl2cAbwMVBS1vQN4Kn08F1gIbAfWA1/oYZ2eDazuYdpI4NtAE7AS+CRQSKdVAP+SroMXgWvT9VjZw7xWAG/uYdoHgKXp+zq/4/8MIOCLwIb0dTwNnNTT+5333+tgGnIP4OEQ3zgYDaxL//CuKmofBrQBZx/k+VdSVBTSD7qtwLHp+LHALuAtQBXw8fSPszodXwr8XTr+xvQP8Lj0ueuA1xblPD193OOHTFGOW0g+VOemH4LfA247yHOGkXxoX5U+57R0vczqax66Lwr/DtQC/wPYC/wYGAdMTj+UXp/2PyZdXzVAA/BL4Maiee/34XegdVzUfxEwBRgCHJe+zklF+botmMAy4C1F4z8Arksf/xZ4T/p4OHBmD/Po8f0iKQj/BdSlOZ4Hrk6nfZDkQ7kxXd8/5xCKQvr/aiNwerpO/w34ZTrtHOBxYBRJgTgBmHig99tD7wbvPhqkImILyTfMocCPiiaNJtkt+HJHg6TPpccVdkn6ZFHfM9P2HSRbCd8BXkinvRu4OyLuj4h9wOdJPpheA5xJ8mHymYhoiYgHgbuAy9Ln7gNmSRoREVsi4ok+vrw7I+KxiGglKQqzD9L/bcCKiPhmRLRGxJPAD4GL+ynPP0TE3oi4j+RD/PsRsSEi1gC/IilCRMTSdH01R0QT8AXg9QeY74HWcYd/jYhVEbGHpNjXpK+lKiJWRMSyHub9fdL3Q1Idybfn7xetj2Mk1UfEzoh4pC8rQ1IFcCnwiYjYERErSLYM3pN2uQT4UkSsTv+ffqYv8y9yOfCNiHgiIpqBTwBnSZqWvoY6ki1ERcSzEbGu6PUdzvtd1lwUBilJV5B8Q/s58NmiSVtIdlNM7GiIiI9HclzhTpJv0h0eiYhREVEHTABOBP4pnTaJZLdAxzzaSb6lTk6nrUrbOqxMp0FyfOJ8YKWkX0g6q48v7+Wix7tJCtCBTAXOSAvcVklbST5QJvRTnvVFj/d0Mz4cQNJ4SbdJWiNpO/BdoP4A8z3QOu6wqmj6UuAjJFszG9JlTeph3rcC75RUA7wTeCIiOpZ1NclWynOSFkh62wEydqeeZMtmZVFb8fs/qTh3l8d90XX97CTZipycfhH5Msmuxg2SbpY0Iu16uO93WXNRGIQkjSPZn/oBkoPOl0h6LUBE7CLZD//OvswzItaTfLt+e9q0luTDtmOZItmNsSadNkVS8f+fo9JpRMSCiLiAZBfLj4HbOxbTl0x9sAr4RVrgOobhEfHnA5znn9J5nhwRI4ArSHZtdOi6vAOt426fExG3RsQfp88L9v9CUNzvGZIP1POAPyEpEh3TXoiIy0jWx2eBOyQN6/3LZCPJt/GpRW2d7z/J7pvGomlT+jDvYl3XzzBgLH/4f/avEfEqYBZJkfubtL2n99t6wUVhcPoy8OOIeCjdZP448LX0WyHp+PskXZcWECQ1AtN7mqGksSQHIxenTbcDb5X0JklVwMdIDnb+hqTo7AY+LqlK0tkkxeQ2SdWSLpc0Mt0lsp1kywWSb9hjJY3sp/XQ4S7gWEnvSfNUSXq1pBMGOE8dyUHkbZImk35IFVkPzCgaP9A6fgVJx0l6Y/o+7+UPB8p7civwYeB1JMcUOuZzhaSGdMtka9rc43wk1RYPad/bgU+np0NPBT5KsmXU8bo+LGmypFEkB8cPpqrLcipJdnddJWl2+pr/CXg0Ilak7+8Z6Xrbla6P9oO839YbeR/U8NC3AbiQ5BvUqC7tDwKfLho/A7iH5I9+K/B74NPA2HT6lST7qDvOvNlA8kc4rmge7yA5YLgN+AVwYtG0E9O2bWmfd6Tt1cDPSHZjbQcWAH9c9LxvkOwC2ErPZx/9Y9H42Rzk4HTa7ziSM5aa0vk/SHIsok956P5Ac2VR/9UUHcQn+SD8ZNE6eTxdn4tIPuRXF/W9AHgpXdZf92Idr2D/A9OnkBz72UFyNs5d3a3Dov5HkXwg3t2l/bvp+72T5EvAhT08/+z09XcdjiE5dvXddH2vAq7nD2cfVZJsyW4iOfvor0i2LNTDclZ0s4x/TKd9kOSgecfrbUzb30RyVtZO/nCm1/CDvd8eDj4oXcFmZpmQdB7w7xEx9aCdLXfefWRm/UrSEEnnS6pMd6P9b5KTHGwQ8JaCDQrpgfSfdjctkl9kW4mQNJRkV9jxJMc97gY+HBHbcw1mveKiYGZmnbz7yMzMOg26i2zV19fHtGnT8o5hZjaoPP744xsjouFg/QZdUZg2bRoLFy7MO4aZ2aAiaeXBe3n3kZmZFXFRMDOzTi4KZmbWyUXBzMw6uSiYmVknFwUzM+uUaVGQdK6kJZKWSrquhz6XSHpG0mJJt3bXx8zMBkZmv1NIb9l3E8n9Z1cDCyTNj+TmHx19ZpLcYu+PImJLx7X/zcwsH1luKcwFlkbE8ohoAW4juZ58sQ8AN0VyH1ciYkNWYe7/1Dk8df1s2lpbs1qEmdmgl2VRmMz+92Zdzf73noXkFnrHSvpvSY9IOre7GUm6RtJCSQubmpoOKUwQnFJ4ka2bXj54ZzOzMpX3geZKYCbJHZ4uI7ml5KiunSLi5oiYExFzGhoOeumObo2d+24Atm9cc5CeZmblK8uisIb9b9jdyP43JIdk62F+ROyLiBeB50mKRL+rHTURgF2bvaVgZtaTLIvCAmCmpOmSqoFLgfld+vyYZCsBSfUku5OWZxFm+JgJAOzdtj6L2ZuZHREyKwoR0QpcC9wLPAvcHhGLJd0gaV7a7V5gk6RngIeAv4mITVnkGVk/CYDW7ZkdyzYzG/QyvXR2RNwD3NOl7fqixwF8NB0yNWJ0A/uigtjpomBm1pO8DzQPmEJFBVs1goo9G/OOYmZWssqmKABsrxhFdfPmvGOYmZWssioKu6rGMLQlk0MWZmZHhLIqCi3VYxjetjXvGGZmJausikLrkHpGtW/LO4aZWckqq6IQwxoYqmZ273RhMDPrTlkVhYq65CKsWzaszTmJmVlpKquiUDNyPAA7NrsomJl1p6yKwtDR6aUutvhSF2Zm3SmrolCXXuqiZbuLgplZd8qqKIyqT66U2rbDRcHMrDtlVRRqhwxjRwxBu3ypCzOz7pRVUQDYWhhN1V7/qtnMrDtlVxR2Vo6ittlFwcysO2VXFPZWj2FY65a8Y5iZlaSyKwottWMZ0e7rH5mZdafsikL70AZGxQ7aWlvzjmJmVnLKrigUhjdQULBl47q8o5iZlZyyKwqVdemlLjb5UhdmZl2VXVHYtuA2AHa6KJiZvULZFYXj3vMFAPZsWp1zEjOz0lN2RWHshKkAtG1bk3MSM7PSU3ZFoXbocLZQR2GHDzSbmXWVaVGQdK6kJZKWSrqum+lXSmqStCgd3p9lng5bCmOp2eOL4pmZdVWZ1YwlVQA3AW8BVgMLJM2PiGe6dP3PiLg2qxzd2VHdwPDmDQO5SDOzQSHLLYW5wNKIWB4RLcBtwAUZLq/XmodOYFSbr39kZtZVlkVhMrCqaHx12tbVuyQ9JekOSVMyzNOpbfhExsQ2Wpr3DsTizMwGjbwPNP8EmBYRpwD3A9/qrpOkayQtlLSwqanpsBdaMXISBQWbXl552PMyMzuSZFkU1gDF3/wb07ZOEbEpIprT0a8Dr+puRhFxc0TMiYg5DQ0Nhx2sdkwjANvWv3TY8zIzO5JkWRQWADMlTZdUDVwKzC/uIGli0eg84NkM83SqG3cUALs2+gdsZmbFMjv7KCJaJV0L3AtUAN+IiMWSbgAWRsR84EOS5gGtwGbgyqzyFBszYRoA+7asOnBHM7Myk1lRAIiIe4B7urRdX/T4E8AnsszQnRGjG9gbVbDd1z8yMyuW94HmXKhQYGOhnqrd/gGbmVmxsiwKANuq6hmy1z9gMzMrVrZFYU/teEa1Hv7prWZmR5KyLQqtw8Yztn0z0d6edxQzs5JRtkWBEZOp0T62bvJxBTOzDmVbFKpHJ1fc2LzuxZyTmJmVjrItCsPqkx9b79zoXzWbmXUo26IwKv0B2/pffiPfIGZmJSTTH6+VsrHjp9AaBSpozTuKmVnJKNsthcqqajYU6hkxckzeUczMSkbZFgWALVUTGL7Hl7owM+tQ1kVh99DJjNn3ct4xzMxKRlkXhba6Rhpis+/AZmaWKuuiUDF2KgUFTWuW5R3FzKwklHVRGDZuBgBb1izNOYmZWWko66IwatIxAOze4F81m5lBmReFcZOn0xoF2raszDuKmVlJKOuiUFlVTZPGUrnD92o2M4MyLwoAm6snMGy3f6tgZgYuCuwe4t8qmJl1KPui0DaikYbY5N8qmJnhokDFmKlUKGha4zOQzMzKvigM7fitwtoXck5iZpa/si8Ko9PfKuzxbxXMzLItCpLOlbRE0lJJ1x2g37skhaQ5WebpTsPk6bSFaNvs3yqYmWVWFCRVADcB5wGzgMskzeqmXx3wYeDRrLIcSFV1DRsYTdtLuSzezKykZLmlMBdYGhHLI6IFuA24oJt+/wB8Fsjt9J+mGMVoduS1eDOzkpFlUZgMrCoaX522dZJ0OjAlIu7OMMdB7a0/kYmFLXlGMDMrCbkdaJZUAL4AfKwXfa+RtFDSwqampn7P0j56BqPZzrYtG/t93mZmg0mWRWENMKVovDFt61AHnAQ8LGkFcCYwv7uDzRFxc0TMiYg5DQ0N/R60ZvyxAKx/8Zl+n7eZ2WCSZVFYAMyUNF1SNXApML9jYkRsi4j6iJgWEdOAR4B5EbEww0zdGj3leAC2r31uoBdtZlZSMisKEdEKXAvcCzwL3B4RiyXdIGleVss9FBOmJkVh3wbfbMfMyltlljOPiHuAe7q0Xd9D37OzzHIgtUOH8zL1VG7zD9jMrLyV/S+aO2ysaaRu10t5xzAzy5WLQmrX8KmMb11z8I5mZkcwF4VUjJ7BaHawbXP/n/JqZjZYuCikasfPBGD9isU5JzEzy4+LQqrztNQ1S3JOYmaWHxeF1Pipx9Me8mmpZlbWXBRStUOGsV71VG1bnncUM7PcHLQoSDpaUk36+GxJH5I0KvtoA29D+wiGb/XuIzMrX73ZUvgh0CbpGOBmkusZ3ZppqpxsYziTtJFob887iplZLnpTFNrTS1a8A/i3iPgbYGK2sfJRc+L5jNAemtb5R2xmVp56UxT2SboMeC9wV9pWlV2k/AxvPAmA9cuezDmJmVk+elMUrgLOAj4dES9Kmg58J9tY+ZhwzGwAdq32bxXMrDwd9IJ4EfEM8CEASaOBuoj4bNbB8jB23GS2UIc2+mCzmZWn3px99LCkEZLGAE8AX5P0heyj5UBiXfVURuxYlncSM7Nc9Gb30ciI2A68E/h2RJwBvDnbWPnZUXcMk/at8BlIZlaWelMUKiVNBC7hDweaj1hRfxwj2cWm9avzjmJmNuB6UxRuILl72rKIWCBpBvBCtrHyU9d4IgAvL1uUcxIzs4F30KIQET+IiFMi4s/T8eUR8a7so+VjfHoG0s5Vv885iZnZwOvNgeZGSXdK2pAOP5TUOBDh8jB2/BS2McxnIJlZWerN7qNvAvOBSenwk7TtiKRCgbVVU6nb7jOQzKz89KYoNETENyOiNR1uARoyzpWrjc2VTGx50WcgmVnZ6U1R2CTpCkkV6XAFsCnrYHnaq1pGa6fPQDKzstObovA+ktNRXwbWARcBV2aYKXeTz/kIAGufeyznJGZmA6s3Zx+tjIh5EdEQEeMi4kLgiD37CKBx1hkA7HrJF8Yzs/JyqHde+2hvOkk6V9ISSUslXdfN9A9KelrSIkm/ljTrEPP0qxGj6lmr8VQ3+bRUMysvh1oUdNAOUgVwE3AeMAu4rJsP/Vsj4uSImA18DiiZayqtHzqTcbuezzuGmdmAOtSiEL3oMxdYmv7YrQW4Dbhgv5kk11TqMKyX8x0QzfUnMbl9HTu3b8k7ipnZgOmxKEjaIWl7N8MOkt8rHMxkYFXR+Oq0rety/lLSMpIthQ/1kOUaSQslLWxqaurFog9f7VGnUVCw6tkFA7I8M7NS0GNRiIi6iBjRzVAXEQe9D0NvRcRNEXE08LfAJ3voc3NEzImIOQ0NA/MTiUnHzQVg24tPDMjyzMxKwaHuPuqNNcCUovHGtK0ntwEXZpinTxomTWMLIyisfyrvKGZmAybLorAAmClpuqRq4FKSy2V0kjSzaPStlNDVV1UosKbmaMZsfy7vKGZmAyazohARrcC1JJfdfha4PSIWS7pB0ry027WSFktaRHKa63uzynModo6ZxVGtK9nX0px3FDOzAdFvxwa6ExH3APd0abu+6PGHs1z+4aqafBrV677Hw595G2dff3/ecczMMtebS2d3dxbSqvRy2jMGImRexh37agCa27Pcy2ZmVjp6s6VwI8nppLeS/GjtUuBo4AngG8DZWYXLW+MxJ7OdYYwec0RfFNbMrFNvvgLPi4ivRsSOiNgeETcD50TEfwKjM86XKxUqWFl7PPVbn847ipnZgOhNUdgt6RJJhXS4BNibTiuZXyBnZVf9bKa2rWTXjq15RzEzy1xvisLlwHuADenwHuAKSUNIzi46og2ZcSYVClY8/Zu8o5iZZa43l85eHhFvj4j6dHh7RCyNiD0R8euBCJmnqSe/FoDtS3+bcxIzs+z15uyjxvRMow3p8ENJjQMRrhSMapjIGk2gZr3vrWBmR77e7D76JskvkSelw0/StrKxru4kGnctJuKIP4RiZmWuN0WhISK+GRGt6XALUFbnaLZNPJ1xbGb96uV5RzEzy1RvisImSVdIqkiHK4BNWQcrJaOPfQ0Aaxf/KuckZmbZ6k1ReB9wCfAysA64CLgyw0wlZ9qJZ9IcVWz77S15RzEzy1Rvzj5aGRHzIqIhIsZFxIXAuwYgW8morh3C0ppZHFW5Le8oZmaZOtSL+ny0X1MMAjsmnsXRbcvZsvHlvKOYmWXmUIuC+jXFIDDqxLcA8OLCe3NOYmaWnUMtCmV3bubRs1/L7qihZenDeUcxM8tMj1dJlbSD7j/8BQzJLFGJqqqu4dkhpzBh84K8o5iZZabHLYWIqIuIEd0MdRGR6c15StXuSWcxrX0VG9e9lHcUM7NM+O4xfVB/cnJcYcXjPq5gZkcmF4U+mH7ya9geQ9jz2LfzjmJmlgkXhT6oqKzkuZjKTK0i2tvzjmNm1u9cFPpIp17GBG3hxecW5h3FzKzfuSj00bSzLgTg5YU/yTmJmVn/c1Hoo4ZJ01hWMYNRqx7KO4qZWb9zUTgETRNex7Eti9m2eWPeUczM+lWmRUHSuZKWSFoq6bpupn9U0jOSnpL0gKSpWebpL6NOfRuVamfpI/PzjmJm1q8yKwqSKoCbgPOAWcBlkmZ16fYkMCciTgHuAD6XVZ7+NPP0N7A1hrHnkW/kHcXMrF9luaUwF1gaEcsjogW4DbiguENEPBQRu9PRR4BBce/nispKno1pnKAVtLW25h3HzKzfZFkUJgOrisZXp209uRr4aXcTJF0jaaGkhU1NTf0Y8dDtLNQxVjt4bsHP845iZtZvSuJAc3qLzznAP3c3PSJujog5ETGnoaE0bg991ke/T0tUsuPJO/OOYmbWb7K8sN0aYErReGPath9Jbwb+Hnh9RDRnmKdfDR8xhqeGns5RTQ8S7e2oUBL11czssGT5SbYAmClpuqRq4FJgv9N1JJ0GfBWYFxEbMsySieZj3sqk2MCy3z+SdxQzs36RWVGIiFbgWuBe4Fng9ohYLOkGSfPSbv8MDAd+IGmRpEF1jufRr72YthBNj/0w7yhmZv0i0/siRMQ9wD1d2q4vevzmLJeftTHjJvP7mMb4l+4G/iXvOGZmh807wg/TOjUwo7COlUsW5R3FzOywuSgcphGVrbSHWPur7+QdxczssLkoHKYz/tdDPFM7mylr7vI9Fsxs0HNR6Ad7jn8njfEyS554OO8oZmaHxUWhHxz3hstpjiq2Pvq9vKOYmR0WF4V+MGLUWBbXncXMpvvZt68l7zhmZofMRaGf6JRLGMs2Hv702/OOYmZ2yFwU+smJr7+YzVHHyPateUcxMztkLgr9pLqmlucnXcjpep4Na1bkHcfM7JC4KPSjxjf9GZVqZ9l9/553FDOzQ+Ki0I8ajzmZxTWzmbbyDtra2vKOY2bWZy4K/az51PcykSYe/Ifz845iZtZnLgr97KQ3/QkbYwRj2jfnHcXMrM9cFPpZdU0tS6dfzqsKz7N88WN5xzEz6xMXhQyc8PaPsDtq2HifL6dtZoOLi0IGRo6dwNPj3s7srfezfs3yvOOYmfWai0JGppz/N1TQzuKb3593FDOzXnNRyMik6cfzWMziDH7PxvWr845jZtYrLgoZarzi/1FLCy/86B/zjmJm1isuChmacuxsnhh9Dqe9fIcvfWFmg4KLQsYmX/ApKmhn+Z2fyjuKmdlBuShkbNL0E3gkTuJVTfNZuWRR3nHMzA7IRWEAzPrLW9mrarbd+THfx9nMSpqLwgAYO76Rxcf+JafsXciiB27LO46ZWY8yLQqSzpW0RNJSSdd1M/11kp6Q1Crpoiyz5O1VF32clYUpjPvNp9i7e2fecczMupVZUZBUAdwEnAfMAi6TNKtLt5eAK4Fbs8pRKqqqa1gak5gc63nyO3+bdxwzs25luaUwF1gaEcsjogW4DbiguENErIiIp4Cy2NH+pv99H4+NeTtz136PJY8/lHccM7NXyLIoTAZWFY2vTtv6TNI1khZKWtjU1NQv4fJywp9+iY0aQ83d/5O9e3bnHcfMbD+D4kBzRNwcEXMiYk5DQ0PecQ5L3aixbDj7c0xrX8VvPvPWvOOYme0ny6KwBphSNN6YtpW9k8++iMfGXsAb9QRP//LHeccxM+uUZVFYAMyUNF1SNXApMD/D5Q0qp1x9Ey8VGpnw4EfYvGFt3nHMzIAMi0JEtALXAvcCzwK3R8RiSTdImgcg6dWSVgMXA1+VtDirPKWmdmgdre/4GiNjByu+PI+21ta8I5mZoYjIO0OfzJkzJxYuXJh3jH7z2A8+z9zF/8BvJl/Faz5wY95xzOwIJenxiJhzsH6D4kDzkWzuRR9jwei38po13+Txn30n7zhmVuZcFPImccqffY0l7VOY9duPsvi3P807kZmVMReFElBTO4xx195LU0UDR/3sKl5Y9Ku8I5lZmXJRKBGjx02m9n13sYMh1N95qQuDmeXCRaGEjGucAe+9m72FIUy482KeefS+vCOZWZlxUSgxk2bMonD1z9hCHdPuuZynfnFn3pHMrIy4KJSg8Y3HMOwvHmR95SSOf/D9PHHfd/OOZGZlwkWhRI0dP4Wxf3E/K6pmcMp//0/u/dQ5eUcyszLgolDCRowdx+QP3cezQ07nHB7hkX+7ipbm5rxjmdkRzEWhxA0bMZpZf30vj0y8gjM3/Yiln341a5Y/l3csMztCuSgMAhWVlZz5ZzfxxNwvMqWwgRHfOpuFP/lq3rHM7AjkojCInH7++9h51S9YFeOY8/jHWfDFS9ixbXPesczsCOKiMMhMnHocx/6vhTwy5QOcvvU+dtx4pk9bNbN+46IwCFVWVXPm1Z/nhbf+J21UcMpDV7LgX97lYw1mdthcFAax4+eew7i/TbYaTt3+MOO+9RoevfFPWL3smbyjmdkg5aIwyNXUDuPMqz/P9msW8sS4dzJ7y31M+PYf8eiNl7JyyaK845nZIOOb7BxhNq5bwbI7P82p6++kVvt4quZVtM75AKe+4WIqKivzjmdmOentTXZcFI5Qm9ev4vmf3sSMF29lnLaxRuNZOf3dTHv9e5g09di845nZAHNRMABaW5p5+oFbqfrtjZxUWAHAs1UnsHX62zn69VcwbvLUfAOa2YBwUbBXWLtsMS/9+nuMf+lupretoD3E89HI5slvoG7WW5g5503UDhmWd0wzy4CLgh3QS0ueZO1vbmPk2l9xTMtzVKmNPVHNC7Uns6vxj6k/+S1MPeHVVNfU5h3VzPqBi4L12q7tW1i24F52L3mACRt/y7T2VQC0RCUrK6ezeeQsYtKpjJo6m4nHzGbk6LE5JzazvnJRsEO2ce0KVi76OftWPcHwzYs5qvl5RrC7c/qGGMmGmunsHDGDqD+WYZNPZGzjcYxvnEFlVVWOyc2sJ70tCj5H0V6hftI06ie9v3M82ttYu2IJTcsXsWfts1RsfoHhmxdzYvMS6jb+CNIfUrdEBStjLFuHHsXeIRNoG9pAoW48VSMnMmTMJOrqJzN6XCPDho9AUk6vzswOJNOiIOlc4EtABfD1iPhMl+k1wLeBVwGbgHdHxIosM1nfqVDBpBmzmDRj1n7t0d7OxvUvsX7Z0+xe/wKSCMrDAAALSUlEQVRtm16kevtKRu5ZzeS9Sxm9eRsVeuWW6K6oZUthNNsrx7CneiytNaNpqxkJQ0ZRGDqKqmFjqB4+htq6MVQPqaNmaB1Dho1gyLARVNfUDNTLNitLmRUFSRXATcBbgNXAAknzI6L4GgxXA1si4hhJlwKfBd6dVSbrXyoUqJ84jfqJ07qd3t7ayuZN69jWtIadm9bQvGUdrdvXo53rqdzTRG3zRhr2LGf4ru3UxS6q1HbQZbZEBXtUy15q2ashtBRqaSkMYV/FENoqh9BWMYSoqCEqqohCMqiimihUQmU1hYoqqEjaVFmNKqsopI8rKqsoVNZQqKymoqqaiqoaChXVVFZXU1FZTaGikkJFBYVCJYVCAVVUUCik4xUFCoUKKiorO6dXVFSggi8aYINLllsKc4GlEbEcQNJtwAVAcVG4APhU+vgO4MuSFIPtQId1q1BZyZjxUxgzfspB+0Z7O3t272DH1o3s3raRPds30bJzC63Nu2jfu4P25l3Evt3Qsgvt201h3y4qWndT2baHyrY9DG/dTHXLHmra91JJK5W0UhXJv9W9KDZZaQ/RRoF2CrQj2inQRoGQCJIBINJpHY+L25PHIoBQ8ri9sy3tp/37dUwLlD6nJ91Pix7ae9Jzf3W7mP6af3G7emjvLk7Xhp4/cHrOedDX3KWpr8vobv775v45p735sh7n1B+yLAqTgVVF46uBM3rqExGtkrYBY4GNxZ0kXQNcA3DUUUdllddypEKBIcNHMmT4SGg8un9nHkFb6z727WtmX0szrftaaGtpZt++5HH7vmZaW1to29dCe2sLrfuaidYW2lv30d7aTHvbPqK1hWhvg/Y2Ito7H5M+VkdbtEFE57RkvLgtaVe0J9PTfMlHRiTtBIpIP0XSafv1Sf8tbo/oKBudz0umt/d5damHjy9FdDulp/49fQwq/c7XdeqBPn67nU9Re3EP9fCdsmvO6Gzv3fL2m1eP31t7XnbX5QQHKkk9lJz27L/gDIoDzRFxM3AzJGcf5RzHBhsp3R1UTe3QurzTmJW0LHd4rgGK9xs0pm3d9pFUCYwkOeBsZmY5yLIoLABmSpouqRq4FJjfpc984L3p44uAB308wcwsP5ntPkqPEVwL3EtySuo3ImKxpBuAhRExH/gP4DuSlgKbSQqHmZnlJNNjChFxD3BPl7brix7vBS7OMoOZmfWeT6I2M7NOLgpmZtbJRcHMzDq5KJiZWadBd+lsSU3AykN8ej1dfi1d4gZbXhh8mZ03W86brb7knRoRDQfrNOiKwuGQtLA31xMvFYMtLwy+zM6bLefNVhZ5vfvIzMw6uSiYmVmncisKN+cdoI8GW14YfJmdN1vOm61+z1tWxxTMzOzAym1LwczMDsBFwczMOpVNUZB0rqQlkpZKui7vPF1JmiLpIUnPSFos6cNp+xhJ90t6If13dN5Zi0mqkPSkpLvS8emSHk3X83+ml00vCZJGSbpD0nOSnpV0VimvX0l/lf5f+L2k70uqLbX1K+kbkjZI+n1RW7frVIl/TbM/Jen0Esn7z+n/iack3SlpVNG0T6R5l0g6pxTyFk37mKSQVJ+O98v6LYuiIKkCuAk4D5gFXCZpVr6pXqEV+FhEzALOBP4yzXgd8EBEzAQeSMdLyYeBZ4vGPwt8MSKOAbYAV+eSqntfAn4WEccDp5LkLsn1K2ky8CFgTkScRHL5+UspvfV7C3Bul7ae1ul5wMx0uAb4ygBlLHYLr8x7P3BSRJwCPA98AiD9+7sUODF9zv9LP0sG0i28Mi+SpgD/A3ipqLlf1m9ZFAVgLrA0IpZHRAtwG3BBzpn2ExHrIuKJ9PEOkg+sySQ5v5V2+xZwYT4JX0lSI/BW4OvpuIA3AnekXUomr6SRwOtI7uFBRLRExFZKeP2SXNp+SHpXwqHAOkps/UbEL0nuhVKsp3V6AfDtSDwCjJI0cWCSJrrLGxH3RURrOvoIyV0iIcl7W0Q0R8SLwFKSz5IB08P6Bfgi8HH2v8Fzv6zfcikKk4FVReOr07aSJGkacBrwKDA+Italk14GxucUqzs3kvzH7Lg7/Fhga9EfWCmt5+lAE/DNdHfX1yUNo0TXb0SsAT5P8k1wHbANeJzSXb/Felqng+Hv8H3AT9PHJZlX0gXAmoj4XZdJ/ZK3XIrCoCFpOPBD4CMRsb14Wnqr0pI4h1jS24ANEfF43ll6qRI4HfhKRJwG7KLLrqISW7+jSb75TQcmAcPoZjdCqSuldXowkv6eZDfu9/LO0hNJQ4G/A64/WN9DVS5FYQ0wpWi8MW0rKZKqSArC9yLiR2nz+o5NwPTfDXnl6+KPgHmSVpDsjnsjyT77UenuDiit9bwaWB0Rj6bjd5AUiVJdv28GXoyIpojYB/yIZJ2X6vot1tM6Ldm/Q0lXAm8DLi+6T3wp5j2a5IvC79K/vUbgCUkT6Ke85VIUFgAz0zM3qkkOHs3POdN+0v3x/wE8GxFfKJo0H3hv+vi9wH8NdLbuRMQnIqIxIqaRrM8HI+Jy4CHgorRbKeV9GVgl6bi06U3AM5To+iXZbXSmpKHp/42OvCW5frvoaZ3OB/40PUvmTGBb0W6m3Eg6l2Q36LyI2F00aT5wqaQaSdNJDuA+lkfGDhHxdESMi4hp6d/eauD09P93/6zfiCiLATif5MyCZcDf552nm3x/TLKZ/RSwKB3OJ9lP/wDwAvBzYEzeWbvJfjZwV/p4BskfzlLgB0BN3vmKcs4GFqbr+MfA6FJev8D/AZ4Dfg98B6gptfULfJ/kmMe+9APq6p7WKSCSswCXAU+TnFlVCnmXkuyL7/i7+/ei/n+f5l0CnFcKebtMXwHU9+f69WUuzMysU7nsPjIzs15wUTAzs04uCmZm1slFwczMOrkomJlZJxcFMzPr5KJg1guSZks6v2h8nvrpEuySPpJevsAsd/6dglkvpJdBmBMR12Yw7xXpvDf24TkVEdHW31nMvKVgRxRJ05TcQOdr6Q1q7pM0pIe+R0v6maTHJf1K0vFp+8VKbmzzO0m/TC+NcgPwbkmLJL1b0pWSvpz2v0XSVyQ9Imm5pLPTm6M8K+mWouV9RdLCNNf/Sds+RHLBu4ckPZS2XSbp6TTDZ4uev1PSv0j6HXCWpM8ouSnTU5I+n80atbKT50/kPXjo7wGYRnKly9np+O3AFT30fQCYmT4+g+T6TZBcImBy+nhU+u+VwJeLnts5TnIjlNtILjNwAbAdOJnkS9fjRVk6LvdQATwMnJKOr+APlyqYRHLdowaSK7s+CFyYTgvgkvTxWJJLL6g4pwcPhzt4S8GORC9GxKL08eMkhWI/6SXKXwP8QNIi4KtAxw1J/hu4RdIHSD7Ae+MnEREkBWV9JBcuawcWFy3/EklPAE+S3M2ru7v/vRp4OJKro3Zcxvl16bQ2kqvoQnJ/hb3Af0h6J7D7FXMyOwSVB+9iNug0Fz1uA7rbfVQguWHN7K4TIuKDks4guavc45Je1YdltndZfjtQmV5l86+BV0fElnS3Um0v5ltsb6THESKiVdJckqunXgRcS3L5crPD4i0FK0uR3MDoRUkXQ+dNz09NHx8dEY9GxPUkd2ubAuwA6g5jkSNIbuyzTdJ4kvvpdiie92PA6yXVp/cDvgz4RdeZpVs6IyPiHuCvSO45bXbYvKVg5exy4CuSPglUkRwX+B3wz5JmkhwjeCBtewm4Lt3V9H/7uqCI+J2kJ0kuhb2KZBdVh5uBn0laGxFvSE91fShd/t0R0d09E+qA/5JUm/b7aF8zmXXHp6SamVkn7z4yM7NO3n1kRzxJN5Hc37jYlyLim3nkMStl3n1kZmadvPvIzMw6uSiYmVknFwUzM+vkomBmZp3+Pyl1TbeQI/w9AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "cvresult = pd.DataFrame.from_csv('1_nestimators.csv')\n",
    "        \n",
    "# plot\n",
    "test_means = cvresult['test-logloss-mean']\n",
    "test_stds = cvresult['test-logloss-std'] \n",
    "        \n",
    "train_means = cvresult['train-logloss-mean']\n",
    "train_stds = cvresult['train-logloss-std'] \n",
    "\n",
    "x_axis = range(0, cvresult.shape[0])\n",
    "        \n",
    "pyplot.errorbar(x_axis, test_means, yerr=test_stds ,label='Test')\n",
    "pyplot.errorbar(x_axis, train_means, yerr=train_stds ,label='Train')\n",
    "pyplot.title(\"XGBoost n_estimators vs Log Loss\")\n",
    "pyplot.xlabel( 'n_estimators' )\n",
    "pyplot.ylabel( 'Log Loss' )\n",
    "pyplot.savefig( 'n_estimators4_1.png' )\n",
    "\n",
    "pyplot.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# max_depth、min_child_weight"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'max_depth': [3, 5, 7, 9], 'min_child_weight': [1, 3, 5]}"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "max_depth = [3,5,7,9]\n",
    "min_child_weight = [1,3,5]\n",
    "param_test_1 = dict(max_depth=max_depth,min_child_weight = min_child_weight)\n",
    "param_test_1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "({'max_depth': 9, 'min_child_weight': 1}, -9.264238929472062e-05)"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb_1 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=139,\n",
    "        max_depth=5,\n",
    "        min_child_weight=1,\n",
    "        gamma=0,\n",
    "        subsample=0.3,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel = 0.7,\n",
    "        objective= 'binary:logistic',\n",
    "        seed=3)\n",
    "\n",
    "\n",
    "gsearch_1 = GridSearchCV(xgb_1, param_grid = param_test_1, scoring='neg_log_loss',n_jobs=-1, cv=kfold)\n",
    "gsearch_1.fit(X_train, y_train)\n",
    "\n",
    "gsearch_1.best_params_,     gsearch_1.best_score_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'max_depth': [8, 9, 10], 'min_child_weight': [1, 2]}"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "max_depth2 = [8,9,10]\n",
    "min_child_weight2 = [1,2]\n",
    "param_test_2 = dict(max_depth=max_depth2,min_child_weight = min_child_weight2)\n",
    "param_test_2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "({'max_depth': 8, 'min_child_weight': 1}, -9.029462894268451e-05)"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb_2 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=139,\n",
    "        max_depth=9,\n",
    "        min_child_weight=1,\n",
    "        gamma=0,\n",
    "        subsample=0.3,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel = 0.7,\n",
    "        objective= 'binary:logistic',\n",
    "        seed=3)\n",
    "\n",
    "\n",
    "gsearch_2 = GridSearchCV(xgb_2, param_grid = param_test_2, scoring='neg_log_loss',n_jobs=-1, cv=kfold)\n",
    "gsearch_2.fit(X_train, y_train)\n",
    "\n",
    "gsearch_2.best_params_,     gsearch_2.best_score_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# colsample_bytree、subsample"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'subsample': [0.3, 0.4, 0.5, 0.6, 0.7, 0.8],\n",
       " 'colsample_bytree': [0.6, 0.7, 0.8, 0.9]}"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "subsample = [i/10.0 for i in range(3,9)]\n",
    "colsample_bytree = [i/10.0 for i in range(6,10)]\n",
    "param_test_3 = dict(subsample=subsample, colsample_bytree=colsample_bytree)\n",
    "param_test_3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "({'colsample_bytree': 0.9, 'subsample': 0.8}, -3.6946927033779665e-05)"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb_3 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=139,\n",
    "        max_depth=8,\n",
    "        min_child_weight=1,\n",
    "        gamma=0,\n",
    "        subsample=0.3,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel = 0.7,\n",
    "        objective= 'binary:logistic',\n",
    "        seed=3)\n",
    "\n",
    "\n",
    "gsearch_3 = GridSearchCV(xgb_3, param_grid = param_test_3, scoring='neg_log_loss',n_jobs=-1, cv=kfold)\n",
    "gsearch_3.fit(X_train , y_train)\n",
    "\n",
    "gsearch_3.best_params_,     gsearch_3.best_score_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# reg_lambda、reg_alpha"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'reg_alpha': [1, 1.5, 2], 'reg_lambda': [0.5, 1, 1.5, 2]}"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reg_alpha = [1, 1.5, 2] \n",
    "reg_lambda = [0.5, 1,1.5, 2]\n",
    "\n",
    "param_test_4 = dict(reg_alpha=reg_alpha, reg_lambda=reg_lambda)\n",
    "param_test_4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "({'reg_alpha': 1, 'reg_lambda': 0.5}, -5.091982633929888e-05)"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb_4 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=139,\n",
    "        max_depth=8,\n",
    "        min_child_weight=1,\n",
    "        gamma=0,\n",
    "        subsample=0.8,\n",
    "        colsample_bytree=0.9,\n",
    "        colsample_bylevel = 0.7,\n",
    "        objective= 'binary:logistic',\n",
    "        seed=3)\n",
    "\n",
    "\n",
    "gsearch_4 = GridSearchCV(xgb_4, param_grid = param_test_4, scoring='neg_log_loss',n_jobs=-1, cv=kfold)\n",
    "gsearch_4.fit(X_train , y_train)\n",
    "\n",
    "gsearch_4.best_params_,     gsearch_4.best_score_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# learning_rate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'learning_rate': [0.05, 0.1, 0.15]}"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "learning_rate = [0.05, 0.1, 0.15] \n",
    "\n",
    "param_test_5 = dict(learning_rate=learning_rate)\n",
    "param_test_5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "({'learning_rate': 0.15}, -3.674715782773053e-05)"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb_5 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=139,\n",
    "        max_depth=8,\n",
    "        min_child_weight=1,\n",
    "        gamma=0,\n",
    "        subsample=0.8,\n",
    "        colsample_bytree=0.9,\n",
    "        colsample_bylevel = 0.7,\n",
    "        reg_alpha = 1,\n",
    "        reg_lambda = 0.5,\n",
    "        objective= 'binary:logistic',\n",
    "        seed=3)\n",
    "\n",
    "\n",
    "gsearch_5 = GridSearchCV(xgb_5, param_grid = param_test_5, scoring='neg_log_loss',n_jobs=-1, cv=kfold)\n",
    "gsearch_5.fit(X_train , y_train)\n",
    "\n",
    "gsearch_5.best_params_,     gsearch_5.best_score_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "param = {'learning_rate':0.15, 'n_estimators':139, 'max_depth':8, 'min_child_weight':1, 'gamma':0, 'subsample':0.8, 'colsample_bytree':0.9, 'colsample_bylevel':0.7, 'reg_alpha':1, 'reg_lambda':0.5, 'objective':'binary:logistic' }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "dtrain = xgb.DMatrix(X_train,label = y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "24"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dtrain.num_col()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "87019"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dtrain.num_row()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[14:35:33] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 0 pruned nodes, max_depth=3\n",
      "[14:35:33] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 4 extra nodes, 0 pruned nodes, max_depth=2\n"
     ]
    }
   ],
   "source": [
    "num_round = 2\n",
    "\n",
    "bst = xgb.train(param, dtrain, num_round)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train Accuary: 100.00%\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import accuracy_score\n",
    "train_preds = bst.predict(dtrain)\n",
    "train_predictions = [round(value) for value in train_preds]\n",
    "y_train = dtrain.get_label()\n",
    "train_accuracy = accuracy_score(y_train, train_predictions)\n",
    "print (\"Train Accuary: %.2f%%\" % (train_accuracy * 100.0))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 给出特征重要性"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAc8AAAEWCAYAAAAASRzMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAAGFxJREFUeJzt3XvUXHV97/H3hyRKTBDK1cg9B61EboIKLpGVqLQgIlQ9rZaLKIqoR2SJtRyXpdqlq56eYmtPjwdBPCBar1iKCFpbeLygoIDcERSJDYpcFCSJOZjA9/wxOzp9fEJmJ888c8n7tdYsZvb+zd7fbzaZT/ae38ykqpAkSb3bbNAFSJI0agxPSZJaMjwlSWrJ8JQkqSXDU5KklgxPSZJaMjylHiU5K8lfDLoOSYMXP+epfkuyFNgBeLRr8dOr6qcbsc3FwCeqaqeNq240JTkPuLuq3j3oWqRNkWeemilHVtX8rtsGB+d0SDJ7kPvfGElmDboGaVNneGqgkhyU5FtJHkpyQ3NGuXbda5PclmR5kh8leWOzfB5wGfDUJCua21OTnJfkfV3PX5zk7q7HS5P8eZIbgZVJZjfPuzDJ/UnuSnLK49T6m+2v3XaSdya5L8k9SY5O8pIkdyT5RZJ3dT33PUk+n+QzTT/XJdm3a/2eSSaaP4dbkrxs0n7/T5JLk6wETgSOAd7Z9P7FZtzpSe5stn9rkj/q2sYJSb6Z5G+TPNj0enjX+q2T/N8kP23WX9S17qVJrm9q+1aSfXo+wNKYMjw1MEl2BL4EvA/YGngHcGGS7Zoh9wEvBZ4MvBb4uyT7V9VK4HDgpxtwJvtq4AhgK+Ax4IvADcCOwIuAU5P8YY/begqwefPcM4BzgGOBA4AXAH+RZPeu8UcBn2t6/SfgoiRzksxp6vhXYHvgrcAnk/x+13P/FHg/sAXwceCTwN80vR/ZjLmz2e+WwHuBTyRZ0LWNA4HbgW2BvwHOTZJm3QXAk4BnNjX8HUCSZwEfA94IbAN8BLg4yRN7/DOSxpLhqZlyUXPm8lDXWc2xwKVVdWlVPVZVXwWuAV4CUFVfqqo7q+NrdMLlBRtZxz9U1bKqWgU8B9iuqv6qqn5dVT+iE4Cv6nFbq4H3V9Vq4NN0QulDVbW8qm4BbgX27Rp/bVV9vhn/QTrBe1Bzmw98oKnjcuASOkG/1r9U1ZXNn9P/m6qYqvpcVf20GfMZ4AfAc7uG/LiqzqmqR4HzgQXADk3AHg6cXFUPVtXq5s8b4CTgI1V1dVU9WlXnA480NUubrJF930cj5+iq+rdJy3YF/muSI7uWzQGuAGguK/4l8HQ6/9B7EnDTRtaxbNL+n5rkoa5ls4Bv9LitnzdBBLCq+e+9XetX0QnF39l3VT3WXFJ+6tp1VfVY19gf0zmjnaruKSU5Hng7sFuzaD6dQF/rZ137/1Vz0jmfzpnwL6rqwSk2uyvwmiRv7Vr2hK66pU2S4alBWgZcUFVvmLyiuSx4IXA8nbOu1c0Z69rLjFNNE19JJ2DXesoUY7qftwy4q6qetiHFb4Cd195JshmwE7D2cvPOSTbrCtBdgDu6nju53//0OMmudM6aXwR8u6oeTXI9v/3zejzLgK2TbFVVD02x7v1V9f4etiNtMrxsq0H6BHBkkj9MMivJ5s1EnJ3onN08EbgfWNOchf5B13PvBbZJsmXXsuuBlzSTX54CnLqe/X8HWN5MIprb1LBXkudMW4f/2QFJXt7M9D2VzuXPq4CrgV/RmQA0p5k0dSSdS8Hrci+wsOvxPDqBej90JlsBe/VSVFXdQ2cC1oeT/F5TwyHN6nOAk5McmI55SY5IskWPPUtjyfDUwFTVMjqTaN5F50V/GfBnwGZVtRw4Bfgs8CCdCTMXdz33+8CngB8176M+lc6klxuApXTeH/3Mevb/KJ0JSfsBdwEPAB+lM+GmH/4F+BM6/RwHvLx5f/HXdMLy8KaGDwPHNz2uy7nAorXvIVfVrcCZwLfpBOvewJUtajuOznu436czUetUgKq6BngD8I9N3T8ETmixXWks+SUJ0gxI8h5gj6o6dtC1SNp4nnlKktSS4SlJUktetpUkqSXPPCVJamkkP+e51VZb1R577DHoMjbaypUrmTdv3qDLmBb2MnzGpQ+wl+lw7bXXPlBV261/pHoxkuG5ww47cM011wy6jI02MTHB4sWLB13GtLCX4TMufYC9TIckP57xnY4xL9tKktSS4SlJUkuGpyRJLRmekiS1ZHhKktSS4SlJUkuGpyRJLRmekiS1ZHhKktSS4SlJUkuGpyRJLRmekiS1ZHhKktSS4SlJUkuGpyRJLRmekiS1ZHhKktSS4SlJUkuGpyRJLRmekiS1ZHhKktSS4SlJUkuGpyRJLRmekiS1ZHhKktSS4SlJUkuGpyRJLRmekiS1ZHhKktSS4SlJUkuGpyRJLRmekiS1ZHhKktSS4SlJUkuGpyRJLRmekiS1ZHhKktSS4SlJUkuGpyRJLRmekiS1ZHhKktSS4SlJUkuGpyRJLRmekiS1ZHhKktSS4SlJUkuGpyRJLaWqBl1Da7ss3KM2++MPDbqMjXba3ms486bZgy5jWtjL8BmXPmC8ejnvsHksXrx4xveb5NqqevaM73hMeeYpSVJLhqckaZOXZPMk30lyQ5Jbkrz38cb3LTyTnJLktiSV5MYkNyX5VpJ9m/U7J7kiya1NoW/rVy2SJK3HI8ALq2pfYD/gsCQHrWtwP99EeDPwYmAX4LaqejDJ4cDZwIHAGuC0qrouyRbAtUm+WlW39rEmSZJ+R3UmAK1oHs5pbuucFNSXM88kZwELgcuAA6vqwWbVVcBOTaH3VNV1zf3lwG3Ajv2oR5Kk9UkyK8n1wH3AV6vq6nWO7dds2yRLgWdX1QNdy94BPKOqXj9p7G7A14G9qurhdWzvJOAkgG233e6AM/7+nL7UPZN2mAv3rhp0FdPDXobPuPQB49XL7lvOYv78+TO+3yVLljjbtkdJtgL+GXhrVd081ZgZm/udZAlwInDwpOXzgQuBU9cVnABVdTadS77ssnCPGodp6+M0/d5ehs+49AHj1cugPqqi3lXVQ0muAA4DpgzPGZltm2Qf4KPAUVX1867lc+gE5yer6gszUYskSZMl2a454yTJXOBQ4PvrGt/3f8ol2QX4AnBcVd3RtTzAuXQmE32w33VIkvQ4FgDnJ5lF58Tys1V1yboGz8R1kDOAbYAPd/KSNc119+cDxwE3NW/QAryrqi6dgZokSfqNqroReFav4/sWnlW1W3P39c1t8vpvAtmQbc+dM4vbP3DEhhc3JCYmJlh6zOJBlzEt7GX4jEsfMH69aPT5DUOSJLVkeEqS1JLhKUlSS4anJEktGZ6SJLVkeEqS1JLhKUlSS4anJEktGZ6SJLVkeEqS1JLhKUlSS4anJEktGZ6SJLVkeEqS1JLhKUlSS4anJEktGZ6SJLVkeEqS1JLhKUlSS4anJEktGZ6SJLVkeEqS1JLhKUlSS4anJEktGZ6SJLVkeEqS1JLhKUlSS4anJEktGZ6SJLXUOjyT/F6SffpRjCRJo6Cn8EwykeTJSbYGrgPOSfLB/pYmSdJw6vXMc8uqehh4OfDxqjoQeHH/ypIkaXj1Gp6zkywA/hi4pI/1SJI09HoNz78CvgLcWVXfTbIQ+EH/ypIkaXjN7mVQVX0O+FzX4x8Br+hXUZIkDbNeJww9Pcm/J7m5ebxPknf3tzRJkoZTr5dtzwH+O7AaoKpuBF7Vr6IkSRpmvYbnk6rqO5OWrZnuYiRJGgW9hucDSf4LUABJXgnc07eqJEkaYj1NGALeApwNPCPJT4C7gGP6VpUkSUNsveGZZDPg2VX14iTzgM2qann/S5MkaTit97JtVT0GvLO5v9LglCRt6np9z/Pfkrwjyc5Jtl5762tlkiQNqV7f8/yT5r9v6VpWwMLpLac3q1Y/ym6nf2kQu55Wp+29hhPGoA+wl2F03mHzBl2CNLZ6OvOsqt2nuA0kOCVp1C1btowlS5awaNEinvnMZ/KhD31o0CWppZ7OPJMcP9Xyqvr4huw0ySnAm4BnADcBAZYDb6qqGzZkm5I0KmbPns2ZZ57J/vvvz/LlyznggAM49NBDWbRo0aBLU496vWz7nK77mwMvovO7nhsUnsCb6fyk2S7AbVX1YJLD6Xwc5sAN3KYkjYQFCxawYMECALbYYgv23HNPfvKTnxieI6TXL4Z/a/fjJFsBn96QHSY5i857pZcBH6uqbzWrrgJ22pBtStKoWrp0Kd/73vc48EDPG0ZJqqr9k5I5wM1V9fsbtNNkKZ3Pjj7QtewdwDOq6vXreM5JwEkA22673QFn/P05G7LrobLDXLh31aCrmB72Mnx233IW8+fPH3QZ02LFihVj2cuqVat429vexrHHHsshhxzS1/0uWbLk2qp6dl93sgnp9T3PL9J8NR+dSUaL6PqJso2VZAlwInDwusZU1dl0Luuyy8I96syber3iPLxO23sN49AH2MswOu+weSxevHjQZUyLiYmJsetl9erVvPSlL+Xkk0/m7W9/+6DLUku9vkL8bdf9NcCPq+ru6SggyT7AR4HDq+rn07FNSRpmVcWJJ57InnvuaXCOqF6/JOElVfW15nZlVd2d5H9s7M6T7AJ8ATiuqu7Y2O1J0ii48sorueCCC7j88svZb7/92G+//bj00ksHXZZa6PXM81DgzyctO3yKZW2dAWwDfDgJwBqvyUsadwcffDAbMt9Ew+NxwzPJm+h8rGRhkhu7Vm0BXLmhO62q3Zq7r29ukiSNjPWdef4TnY+U/DVwetfy5VX1i75VtR5z58zi9g8cMajdT5uJiQmWHrN40GVMC3sZPhMTE4MuQRpbjxueVfVL4JfAqwGSbE/nSxLmJ5lfVf/R/xIlSRouPU0YSnJkkh/Q+RHsrwFL6ZyRSpK0yel1tu37gIOAO6pqdzpfz3dV36qSJGmI9Rqeq5vPYG6WZLOqugJwVqwkaZPU60dVHkoyH/gG8Mkk9wEr+1eWJEnDq9czz6OAXwGnAl8G7gSO7FdRkiQNs15/VWVlkl2Bp1XV+UmeBMzqb2mSJA2nXmfbvgH4PPCRZtGOwEX9KkqSpGHW62XbtwDPBx4GqKofANv3qyhJkoZZr+H5SFX9eu2DJLP57U+USZK0Sek1PL+W5F3A3CSH0vktzy/2ryxJkoZXr+F5OnA/cBPwRuBS4N39KkqSpGG2vl9V2aWq/qOqHgPOaW6SJG3S1nfm+ZsZtUku7HMtkiSNhPWFZ7ruL+xnIZIkjYr1hWet474kSZus9X3D0L5JHqZzBjq3uU/zuKrqyX2tTpKkIbS+H8P2K/gkSZqk14+qSJKkhuEpSVJLhqckSS0ZnpIktWR4SpLUkuEpSVJLhqckSS0ZnpIktWR4SpLUkuEpSVJLhqckSS0ZnpIktWR4SpLUkuEpSVJLhqckSS0ZnpIktWR4SpLUkuEpSVJLhqckSS0ZnpIktWR4SpLUkuEpSVJLhqckSS0ZnpIktTR70AVsiFWrH2W307806DI22ml7r+GEMegD4LzD5g26BEmaMZ55SmPuda97Hdtvvz177bXXoEuRxkZfwzPJKUluS3Jhkm8neSTJO7rWb57kO0luSHJLkvf2sx5pU3TCCSfw5S9/edBlSGOl35dt3wy8GPg1sCtw9KT1jwAvrKoVSeYA30xyWVVd1ee6pE3GIYccwtKlSwddhjRW+nbmmeQsYCFwGXBMVX0XWN09pjpWNA/nNLfqV02SJE2Hvp15VtXJSQ4DllTVA+sal2QWcC2wB/C/q+rqdYw7CTgJYNttt+OMvdf0oeqZtcPczqShcbBixQomJiYGXca0GJdeuvv42c9+xsqVK0e2r3E5JjBevWzKBj7btqoeBfZLshXwz0n2qqqbpxh3NnA2wC4L96gzbxp46RvttL3XMA59QGe27eLFiwddxrSYmJgYi166+1i6dCnz5o3uMRqXYwLj1cumbGhm21bVQ8AVwGGDrkWSpMcz0PBMsl1zxkmSucChwPcHWZM0bl796lfzvOc9j9tvv52ddtqJc889d9AlSSNvRq4ZJnkKcA3wZOCxJKcCi4AFwPnN+56bAZ+tqktmoiZpU/GpT31q0CVIY6ev4VlVu3U93GmKITcCz+pnDZIkTbeRnK0yd84sbv/AEYMuY6NNTEyw9JjFgy5jWjh7UNKmZGgmDEmSNCoMT0mSWjI8JUlqyfCUJKklw1OSpJYMT0mSWjI8JUlqyfCUJKklw1OSpJYMT0mSWjI8JUlqyfCUJKklw1OSpJYMT0mSWjI8JUlqyfCUJKklw1OSpJYMT0mSWjI8JUlqyfCUJKklw1OSpJYMT0mSWjI8JUlqyfCUJKklw1OSpJYMT0mSWjI8JUlqyfCUJKklw1OSpJYMT0mSWjI8JUlqyfCUJKklw1OSpJYMT0mSWjI8JUlqyfCUJKklw1OSpJYMT0mSWjI8JUlqyfCUJKklw1OSpJYMT0mSWjI8JUlqyfCUJKklw1OSpJYMT0mSWjI8JUlqyfCUJKmlVNWga2gtyXLg9kHXMQ22BR4YdBHTxF6Gz7j0AfYyHXatqu0GsN+xNHvQBWyg26vq2YMuYmMluWYc+gB7GUbj0gfYi4aPl20lSWrJ8JQkqaVRDc+zB13ANBmXPsBehtG49AH2oiEzkhOGJEkapFE985QkaWAMT0mSWhra8ExyWJLbk/wwyelTrH9iks80669OstvMV9mbHno5Icn9Sa5vbq8fRJ3rk+RjSe5LcvM61ifJPzR93phk/5musVc99LI4yS+7jskZM11jL5LsnOSKJLcmuSXJ26YYMxLHpcdeRuW4bJ7kO0luaHp57xRjRuY1TFOoqqG7AbOAO4GFwBOAG4BFk8a8GTiruf8q4DODrnsjejkB+MdB19pDL4cA+wM3r2P9S4DLgAAHAVcPuuaN6GUxcMmg6+yhjwXA/s39LYA7pvj/aySOS4+9jMpxCTC/uT8HuBo4aNKYkXgN8zb1bVjPPJ8L/LCqflRVvwY+DRw1acxRwPnN/c8DL0qSGayxV730MhKq6uvALx5nyFHAx6vjKmCrJAtmprp2euhlJFTVPVV1XXN/OXAbsOOkYSNxXHrsZSQ0f9Yrmodzmtvk2Zmj8hqmKQxreO4ILOt6fDe/+5foN2Oqag3wS2CbGamunV56AXhFc0nt80l2npnSpl2vvY6K5zWX3S5L8sxBF7M+zWW/Z9E5y+k2csflcXqBETkuSWYluR64D/hqVa3zuAz5a5imMKzhuan5IrBbVe0DfJXf/mtUg3Mdne8C3Rf4X8BFA67ncSWZD1wInFpVDw+6no2xnl5G5rhU1aNVtR+wE/DcJHsNuiZNn2ENz58A3WdfOzXLphyTZDawJfDzGamunfX2UlU/r6pHmocfBQ6YodqmWy/HbSRU1cNrL7tV1aXAnCTbDrisKSWZQydsPllVX5hiyMgcl/X1MkrHZa2qegi4Ajhs0qpReQ3TFIY1PL8LPC3J7kmeQOfN9IsnjbkYeE1z/5XA5VU1jN/4sN5eJr3/9DI67/WMoouB45vZnQcBv6yqewZd1IZI8pS17z8leS6dvytD98LW1HgucFtVfXAdw0biuPTSywgdl+2SbNXcnwscCnx/0rBReQ3TFIbyV1Wqak2S/wZ8hc5s1Y9V1S1J/gq4pqoupvOX7IIkP6Qz8eNVg6t43Xrs5ZQkLwPW0OnlhIEV/DiSfIrObMdtk9wN/CWdiRBU1VnApXRmdv4Q+BXw2sFUun499PJK4E1J1gCrgFcN6Qvb84HjgJua99cA3gXsAiN3XHrpZVSOywLg/CSz6AT8Z6vqklF8DdPU/Ho+SZJaGtbLtpIkDS3DU5KklgxPSZJaMjwlSWrJ8JQkqaWh/KiKNAhJHgVu6lp0dFUtHVA5koaYH1WRGklWVNX8Gdzf7OY7TSWNGC/bSj1KsiDJ15vfkbw5yQua5Yclua75svJ/b5ZtneSi5sv+r0qyT7P8PUkuSHIlnQ/Iz0ryP5N8txn7xgG2KKlHXraVfmtu1zfb3FVVfzRp/Z8CX6mq9zffHPOkJNsB5wCHVNVdSbZuxr4X+F5VHZ3khcDHgf2adYuAg6tqVZKT6Hxd3nOSPBG4Msm/VtVd/WxU0sYxPKXfWtX8Csa6fBf4WPPl5RdV1fVJFgNfXxt2VbX2N0IPBl7RLLs8yTZJntysu7iqVjX3/wDYJ8krm8dbAk8DDE9piBmeUo+q6utJDgGOAM5L8kHgwQ3Y1Mqu+wHeWlVfmY4aJc0M3/OUepRkV+DeqjqHzk/H7Q9cBRySZPdmzNrLtt8AjmmWLQYeWMfvbH6Fzhedz2nGPj3JvL42ImmjeeYp9W4x8GdJVgMrgOOr6v7mfcsvJNkMuI/Oz0+9h84l3hvp/JLJa6beJB8FdgOua35q637g6H42IWnj+VEVSZJa8rKtJEktGZ6SJLVkeEqS1JLhKUlSS4anJEktGZ6SJLVkeEqS1NL/B+I8XsLtQdDIAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "from xgboost import plot_importance\n",
    "\n",
    "plot_importance(bst)\n",
    "pyplot.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "这个特征重要性的图有这么小"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7rc1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
